**** This program uses CHIP 2002 rural sample to create Tables 1, 2, 4, 5, 6, 7 and 8.
**** The CHIP 2002 data is publicly avaiable from http://www.icpsr.umich.edu/cocoon/ICPSR/STUDY/21741.xml.

clear all
set more on
set mem 200m
set mat 800
capture log close

clear all
capture log close

 /* CHIP data folder*/
global chip "E:\CHIPS\2002\Rural"                                    
use $chip\h.dta, clear                                                // open the household files of CHIP data
gen hsize = h1_85                                                     /* household size */
gen income=h1_500                                                     /* reported aggregate household net income from the CHIP dataset */
gen exp = h1_600 + h1_617                                             /* household living expenditure */

*** Alternative income definition    
mvencode h1_*, mv(.= 0) override                                      /* replace missing operational income with zero*/
gen nonwageinc=h1_401_1-h1_401_2+h1_402_1-h1_402_2+h1_403 + h1_404 + h1_405+h1_406+h1_407+h1_408+ h1_409 + h1_410   // total non-wage income 

sort coun vill hous
save temp, replace

use $chip\p1.dta, clear                                               // open the individual files of CHIP data
sort coun vill hous

 /* Aggregate household wage and nonwage income */
mvencode p1_*, mv(.= 0) override                                       /* replace missing income components into zero */
by coun vill hous: egen hnewinc1=sum(p1_43)                            /* wage income*/
by coun vill hous: egen hnewinc2=sum(p1_57)                            /* non-wage income*/
by coun vill hous: egen hnewinc3=sum(p1_63)                            /* main non-wage income*/
by coun vill hous: egen hnewinc4=sum(p1_66)                            /* secondary non-wage income*/

/* Household head information */
by coun vill hous: egen hage=max(p1_5*(p1_3==1))                        /* household head age */
by coun vill hous: egen mage=max(p1_5*((p1_3==2 | p1_3==1) & p1_4==2))  // mother's age 
by coun vill hous: egen hsex=max(p1_4*(p1_3==1))                        /* household head sex */
by coun vill hous: egen hedu=max(p1_12*(p1_3==1))                       /* household head year of schooling*/
by coun vill hous: egen hmin=max((p1_8==1)*(p1_3==1 | p1_3==2))         /* minorty if hh head or the spouse*/ 
recode hmin (0 2=0)                                                     // 1 as minority and 0 as others  

 /* household health information */
by coun vill hous: egen badhealth=max(p1_21==5 )                        /* having a family member with bad health*/

/* Family demographic information */
by coun vill hous: egen hpop=count(p1_2)                                 /* total number of family members*/
by coun vill hous: egen childmaxage=max(p1_5*(p1_5<20))
by coun vill hous: egen son=sum(p1_3==3 & p1_4==1 & p1_5<20)             /* number of sons*/
by coun vill hous: egen daughter=sum(p1_3==3 & p1_4==2 & p1_5<20)        /* number of daughters*/                           
by coun vill hous: egen sona=sum(p1_3==3 & p1_4==1 & p1_9==2)            /* number of sons: alternative definition */
by coun vill hous: egen daughtera=sum(p1_3==3 & p1_4==2 & p1_9==2)       /* number of daughters: alternative definition */                            
by coun vill hous: egen age15=max(p1_5>14 & p1_5<20)                     /* having a family member aged 15-19*/
by coun vill hous: egen age10=max(p1_5>9 & p1_5<15)                      /* having a family member aged 10-14*/
by coun vill hous: egen age5 =max(p1_5>4 & p1_5<10)                      /* having a family member aged 5-9*/
by coun vill hous: egen singleson=max(((p1_5>25 & p1_5<35) & p1_4==1 & p1_6==2))    /* unmarriaged son over 25 years old */
by coun vill hous: egen singlegirl=max(((p1_5>25 & p1_5<35) & p1_4==2 & p1_6==2))   /* unmarriaged daughter over 25 years old */

   **** aggregate the following variable into household level
collapse (first) hage  hsex hedu hmin hpop child son daughter sona daughtera mage badhealth singleson singlegirl hnewinc* age*, by(coun vill hous)

merge coun vill hous using temp                                          // merge household-level data with aggregated individual level data
sort coun
drop _merge
rename coun code02

merge code using D:\dq\ShangJin\Writings\saving\jpe\Final\sexratio_1990.dta  // merge the sex ratio data,which is from 1990 Population Census
drop _merge 

sort code
by code: egen cc=count(code)                                              /* drop the redundant county codes */
drop if cc==1

       **** Define the interaction term between having a son and local sex ratio variables
gen dson=(son>0)
gen sonsr=dson*(sr)

       **** Define income variables
gen wageinc=hnewinc1+hnewinc2                               // total wage income aggregated based on individual information
gen newinc=nonwageinc+wageinc                               // Caculated total income=wage income + nonwage income
replace newinc=income if newinc<0                           // replace negative computed income
replace income=newinc if income<1                           // replace reported negative income with computed income
gen pinc=income/hpop                                        /* per capita net income*/
gen loginc=log(pinc)                                        /* per capita net income (log)*/
gen loginc2=loginc*loginc
        
	   ***** Income by quartile
bysort code: quantiles pinc [w=hpop], gen(qinc)  n(4)  
gen pinc1=sr*(qinc==1)                                     /* interact sex ratio with top income group*/
gen pinc2=sr*(qinc==2)                                     
gen pinc3=sr*(qinc==3)
gen pinc4=sr*(qinc==4)

       **** Savings rate
gen saving = log((income)/exp)                            // in log form
gen savingr= ((income-exp)/income)                        // in share form


      **** generate gini coefficients 
drop if pinc==.
egen gini = inequal(pinc), by(code) weight(hpop) index(gini)    /* gini by county for income*/


********** preparing samples for the regression analysis******

gen nochild=(child==0)
gen child=son+daughter                                    // number of child
gen childa=sona+daughtera                                 // number of child under alternative definition
gen twoson=(son==2 & child==2 & hsize==4)                 // with two sons
gen twogirl=(daughter==2 & child==2 & hsize==4)           // with two girls
gen songirl=(daughter==1 & son==1 & child==2 & hsize==4)  // with son and daugther
gen twochild = (twoson==1 | songirl==1 | twogirl==1)      // with two child

gen oneson=(son==1 & child==1 & hsize==3 )                // nuclear family with only 1 son 
gen onegirl=(daughter==1 & child==1 & hsize==3  )         // nuclear family with only 1 daugther
gen onechild = oneson+onegirl                             // nuclear one child family
gen oneson40=(oneson & mage<40)                           // nuclear family with 1 son and mother's age younger than 40
gen onegirl40=(onegirl & mage<40)                         // nuclear family with 1 daughter and mother's age younger than 40
gen onechild40=oneson40+onegirl40                         // nuclear family with 1 child and mother's age younger than 40
gen oneson45=(oneson & mage<45)                           // nuclear family with 1 son and mother's age younger than 45
gen onegirl45=(onegirl & mage<45)                         // nuclear family with 1 daughter and mother's age younger than 45
gen onechild45=oneson45+onegirl45                         // nuclear family with 1 child and mother's age younger than 45

gen son1=(son==1 & child==1 )                             // extended family: one son 
gen girl1=(daughter==1 & child==1)                        // extended family: one daughter 
gen child1=son1+girl1                                     // extended family: one child

gen oneson1_40=(son1 & mage<40)                           // extended familiy: one son and mother's age < 40
gen onegirl1_40=(girl1 & mage<40)                         // extended familiy: one daughter and mother's age < 40
gen onechild1_40=oneson1_40+onegirl1_40                   // extended familiy: one child and mother's age < 40
gen oneson1_45=(son1 & mage<45)                           // extended familiy: one son and mother's age < 45
gen onegirl1_45=(girl1 & mage<45)                         // extended familiy: one daughter and mother's age < 45
gen onechild1_45=oneson1_45+onegirl1_45                   // extended familiy: one child and mother's age < 45

        *** Alternative definition of one child family: no explicit marriage status for child
gen onesona=(sona==1 & childa==1 & hsize==3 )             // nuclear family with only 1 son 
gen onegirla=(daughtera==1 & childa==1 & hsize==3  )      // nuclear family with only 1 daugther
gen onechilda = onesona+onegirla                          // nuclear one child family
gen oneson40a=(onesona & mage<40)                         // nuclear family with 1 son and mother's age younger than 40
gen onegirl40a=(onegirla & mage<40)                       // nuclear family with 1 daughter and mother's age younger than 40
gen onechild40a=oneson40a+onegirl40a                      // nuclear family with 1 child and mother's age younger than 40
gen oneson45a=(onesona & mage<45)                         // nuclear family with 1 son and mother's age younger than 45
gen onegirl45a=(onegirla & mage<45)                       // nuclear family with 1 daughter and mother's age younger than 45
gen onechild45a=oneson45a+onegirl45a                      // nuclear family with 1 child and mother's age younger than 45    

gen son1a=(sona==1 & child==1 )                            // family with one son 
gen girl1a=(daughtera==1 & child==1)                       // family with one daughter 
gen child1a=son1a+girl1a                                   // family with one child

gen oneson1_40a=(son1a & mage<40)                          // extended family: one son and mother's age < 40
gen onegirl1_40a=(girl1a & mage<40)                        // extended familiy: one daughter and mother's age < 40
gen onechild1_40a=oneson1_40a+onegirl1_40a                 // extended familiy: one child and mother's age < 40
gen oneson1_45a=(son1a & mage<45)                          // extended familiy: one son and mother's age < 45
gen onegirl1_45a=(girl1a & mage<45)                        // extended familiy: one daughter and mother's age < 45
gen onechild1_45a=oneson1_45a+onegirl1_45a                 // extended familiy: one child and mother's age < 45


gen single=1 if singleson
replace single=2 if singlegirl
replace single=0 if single==.
replace single=3 if singleson & singlegirl

  /* define outliers for whole sample */
egen outlier95= pctile(savingr),p(95)            /* above 95% */
egen outlier5= pctile(savingr),p(5)              /* 5% */
egen outlier99= pctile(savingr),p(99)        /* 99%*/
egen outlier1= pctile(savingr),p(1)          /* 1% */
gen range1=(savingr>outlier5 & savingr<outlier95)
gen range2=(savingr>outlier1 & savingr<outlier99)

  ***** Create samples by trimming 1% and 5% outliers 
foreach x of varlist oneson onegirl onechild son1 girl1 oneson40 onegirl40 onechild40 ///
 oneson40a onegirl40a onechild40a oneson45 onegirl45 onechild45 oneson45a onegirl45a onechild45a /// 
 oneson1_40 onegirl1_40 onechild1_40 oneson1_45 onegirl1_45 onechild1_45 /// 
 oneson1_40a onegirl1_40a onechild1_40a oneson1_45a onegirl1_45a onechild1_45a {
 egen outlier95_`x'= pctile(savingr) if `x',p(95)                      // 95%
 egen outlier5_`x'= pctile(savingr) if `x',p(5)                        // 5%
 egen outlier99_`x'= pctile(savingr) if `x',p(99)                      // 99%
 egen outlier1_`x'= pctile(savingr) if `x',p(1)                        // 1%
 gen range1_`x'=(savingr>outlier5_`x' & savingr<outlier95_`x')         // trim both top and bottom 5%
 gen range2_`x'=(savingr>outlier1_`x' & savingr<outlier99_`x')         // trim both top and bottom 1%
}

gen poor=(income<2000 | exp<2000)                                      // sample with household income or expenditure less than 2000 yuan


  **** Define the motive of savings
gen motive1=h2_4301              // first motive of savings
gen motive2=h2_4302              // second motive of savings


recode motive1 motive2 (1=5) (2=6) (3=2) (5=1) (6=3) 
  /* new codes for motives of savings: 
  1: chidlren's wedding;
  2: children's eductation;
  3: bequest to children;
  4: to build a house;
  5: retirement;
  6: medical expenses;
  7: other:
  8: unclear */
 

 ******* Define a variable "mentioned as most or second most important motive for the seven reasons"
   **** If the first or second motives are direclty related to son preference
gen mc_s=100 if (motive1==1 | motive1==2 | motive1==3) | (motive2==1 | motive2==2 | motive2==3)
replace mc_s=0 if mc_s==.
replace mc_s=. if motive1==8 & motive2==8     // Only consider the observations with definite answers 

  **** If the first or second motives are not direclty related to son preference  
gen mcn_s=100 if (motive1==4 | motive1==5 | motive1==6) | (motive2==4 | motive2==5 | motive2==6)
replace mcn_s=0 if mcn_s==.
replace mcn_s=. if motive1==8 & motive2==8   // Only consider the observations with definite answers 
 
   **** Express the motives in percentage
foreach n of numlist 1/7 {
gen mt`n'=100 if motive1==`n' | motive2==`n'
replace mt`n'=0 if mt`n'==. 
replace mt`n'=. if (motive1==8 & motive2==8)
}


************* Table 1: Why Do People Save? Self Reported the Most or the Second Most Important Reasons for Savings (%)
tabstat mc_s mt1 mt2 mt3 mcn_s mt4 mt5 mt6 mt7 if hsize==3 & hage<45 & onechild, by(son) stat(mean) col(stat)    // One child household
tabstat mc_s mt1 mt2 mt3 mcn_s mt4 mt5 mt6 mt7 if hsize==4 & hage<45 & twochild, by(son) stat(mean) col(stat)    // Two child household
tabstat mc_s mt1 mt2 mt3 mcn_s mt4 mt5 mt6 mt7, stat(mean) col(stat)                                            // All the households


************** Table 2: Material Wealth and Marital Status: Which Families Are More Likely to Have an Unmarried Adult Child? (Rural Part)
recode h1_88 (0 3 4=0) (1 2 =1), gen(housewealth)                        // 1 for houses made of concrete and bricks and 0 for mud and other inferior materials
global xsingle housewealth loginc hsize hage hsex hedu hmin badhealth                  // define the regressors
eststo a1: xi:  logit singleson $xsingle if hage>49 & hage<61 & singlegirl==0,  r       // having a single son
eststo a2: xi:  logit singlegirl $xsingle  if hage>49 & hage<61 & singleson==0, r       // having a single daughter
eststo a3: xi:  mlogit single $xsingle  if hage>49 & hage<61 & single<3, r              // multinominal logit on having a single adult son or daughter
estout a1 a2 a3, cells(b(star fmt(%9.2f)) se(par)) drop(_cons)  starlevels(* .10 ** .05) ///
        stats(r2_a aic  N, fmt(%9.2f %9.1f %9.0g)) replace 
		

************* Table 4: Summary statistics on Housing Savings in 2002 (Rural)
tabstat saving [aw=hpop] if oneson & mage<40, stat(mean median max min sd n)    // nuclear family with one son
tabstat saving [aw=hpop] if onegirl & mage<40, stat(mean median max min sd n)   // nuclear family with one daughter
tabstat saving [aw=hpop], stat(mean median max min sd n)                        // all families

 
************** Table 5: Rural Household-Level Savings for Three-Person Households with a Child in China
*****OLS regressions: Dependent variable is ln(Y/C) ***********************
global xlist sr loginc loginc2  age5 age10 age15 hage hsex hedu hmin badhealth gini
eststo ra1: xi: quietly reg saving $xlist if oneson40, r                        // One son: full sample 
eststo ra2: xi: quietly reg saving $xlist if onegirl40, r                       // One daughter: full sample
eststo ra3: xi: quietly reg saving $xlist if oneson40 & poor==0, r              // One son: drop poor families (income or expenditure < 2000 yuan a son
eststo ra4: xi: quietly reg saving $xlist if onegirl40 & poor==0, r             // One daughter drop poor families (income or expenditure < 2000 yuan 
eststo ra5: xi: quietly reg saving $xlist if range1_oneson40, r                 // One son: remove the bottom and top 5% savers 
eststo ra6: xi: quietly reg saving $xlist if range1_onegirl40, r                // One daughter: remove the bottom and top 5% savers 
eststo ra7: xi: quietly reg saving $xlist if range1_oneson40a, r                // One son: remove the bottom and top 5% savers and no explicit marrage status for child
eststo ra8: xi: quietly reg saving $xlist if range1_onegirl40a, r               // One daughter: remove the bottom and top 5% savers and no explicit marrage status for child
estout ra1 ra2 ra3 ra4 ra5 ra6 ra7 ra8, cells(b(star fmt(%9.2f)) se(par)) drop(_cons)  starlevels(* .10 ** .05) ///
        stats(r2_a aic  N, fmt(%9.2f %9.1f %9.0g) ) replace 
		

************** Table 6: Robust Checks- Rural Household-Level Savings 
   ********* Upper part of the Table: Nuclear family
	    ******OLS regressions for ln(Y/C) + Nuclear family (household age is now limited to younger than 45)
eststo rb1: xi: quietly reg saving $xlist if oneson45, r                       // One son: full sample
eststo rb2: xi: quietly reg saving $xlist if onegirl45, r	                   // one daughter: full sample
eststo rb3: xi: quietly reg saving $xlist if oneson45 & poor==0, r             // one son: drop poor families with income or expenditure less than 2000 yuan
eststo rb4: xi: quietly reg saving $xlist if onegirl45 & poor==0, r            // one daughter: drop poor families with income or expenditure less than 2000 yuan
eststo rb5: xi: quietly reg saving $xlist if range2_oneson45, r                // One son: remove the bottom and top 1% savers 
eststo rb6: xi: quietly reg saving $xlist if range2_onegirl45, r               // One daughter: remove the bottom and top 1% savers 
eststo rb7: xi: quietly reg saving $xlist if range1_oneson45, r                // One son: remove the bottom and top 5% savers 
eststo rb8: xi: quietly reg saving $xlist if range1_onegirl45, r               // One daughter: remove the bottom and top 5% savers 
eststo rb9: xi: quietly reg saving $xlist if range1_oneson45a, r               // One son: remove the bottom and top 5% savers and no explicit marrage status for child
eststo rb10: xi: quietly reg saving $xlist if range1_onegirl45a, r             // One daughter: remove the bottom and top 5% savers and no explicit marrage status for child
estout rb* using $result\table6.xls, keep(sr) cells(b(star fmt(%9.2f) ) se(par ))   starlevels(* .10 ** .05 *** .01)  append
  
        ******Median regressiosns + Nuclear Family
eststo ma1: xi: quietly qreg saving $xlist if oneson45                         // One son: full sample
eststo ma2: xi: quietly qreg saving $xlist if onegirl45                        // one daughter: full sample
eststo ma3: xi: quietly qreg saving $xlist if oneson45 & poor==0               // one son: drop poor families with income or expenditure less than 2000 yuan
eststo ma4: xi: quietly qreg saving $xlist if onegirl45 & poor==0              // one daughter: drop poor families with income or expenditure less than 2000 yuan
eststo ma5: xi: quietly qreg saving $xlist if oneson45 & range2_oneson45       // One son: remove the bottom and top 1% savers 
eststo ma6: xi: quietly qreg saving $xlist if onegirl45 & range2_onegirl45     // One daughter: remove the bottom and top 1% savers 
eststo ma7: xi: quietly qreg saving $xlist if range1_oneson45                  // One son: remove the bottom and top 5% savers 
eststo ma8: xi: quietly qreg saving $xlist if range1_onegirl45                 // One daughter: remove the bottom and top 5% savers 
eststo ma9: xi: quietly qreg saving $xlist if range1_oneson45a                 // One son: remove the bottom and top 5% savers and no explicit marrage status for child
eststo ma10: xi: quietly qreg saving $xlist if range1_onegirl45a               // One daughter: remove the bottom and top 5% savers and no explicit marrage status for child
estout ma* using $result\table6.xls, keep(sr) cells(b(star fmt(%9.2f) ) se(par ))   starlevels(* .10 ** .05 *** .01)  append
 	         

   ************ Lower part of the table: Extended families with one child (household head age<45)
global xlistb sr loginc loginc2  age5 age10 age15 hage hsize hsex hedu hmin badhealth  gini	
        *******OLS regressions for ln(Y/C) + Extended Family
eststo eb1: xi: quietly reg saving $xlistb if oneson1_45, r                   // One son: full sample
eststo eb2: xi: quietly reg saving $xlistb if onegirl1_45, r	              // One daughter: full sample
eststo eb3: xi: quietly reg saving $xlistb if oneson1_45 & poor==0, r         // one son: drop poor families with income or expenditure less than 2000 yuan
eststo eb4: xi: quietly reg saving $xlistb if onegirl1_45 & poor==0, r        // one daughter: drop poor families with income or expenditure less than 2000 yuan
eststo eb5: xi: quietly reg saving $xlistb if range2_oneson1_45, r            // One son: remove the bottom and top 1% savers 
eststo eb6: xi: quietly reg saving $xlistb if range2_onegirl1_45, r           // One daughter: remove the bottom and top 1% savers 
eststo eb7: xi: quietly reg saving $xlistb if range1_oneson1_45, r            // One son: remove the bottom and top 5% savers 
eststo eb8: xi: quietly reg saving $xlistb if range1_onegirl1_45, r           // One daughter: remove the bottom and top 5% savers 
eststo eb9: xi: quietly reg saving $xlistb if range1_oneson1_45a, r           // One son: remove the bottom and top 5% savers and no explicit marrage status for child
eststo eb10: xi: quietly reg saving $xlistb if range1_onegirl1_45a, r         // One daughter: remove the bottom and top 5% savers and no explicit marrage status for child
estout eb* using $result\table6.xls, keep(sr) cells(b(star fmt(%9.2f) ) se(par ))   starlevels(* .10 ** .05 *** .01)  append
 
 
       ********Median regressions + Extended Family
eststo em1: xi: quietly qreg saving $xlistb if oneson1_45                     // One son: full sample
eststo em2: xi: quietly qreg saving $xlistb if onegirl1_45	                  // One daughter: full sample
eststo em3: xi: quietly qreg saving $xlistb if oneson1_45 & poor==0           // one son: drop poor families with income or expenditure less than 2000 yuan
eststo em4: xi: quietly qreg saving $xlistb if onegirl1_45 & poor==0          // one daughter: drop poor families with income or expenditure less than 2000 yuan
eststo em5: xi: quietly qreg saving $xlistb if range2_oneson1_45              // One son: remove the bottom and top 1% savers 
eststo em6: xi: quietly qreg saving $xlistb if range2_onegirl1_45             // One daughter: remove the bottom and top 1% savers 
eststo em7: xi: quietly qreg saving $xlistb if range1_oneson1_45              // One son: remove the bottom and top 5% savers 
eststo em8: xi: quietly qreg saving $xlistb if range1_onegirl1_45             // One daughter: remove the bottom and top 5% savers 
eststo em9: xi: quietly qreg saving $xlistb if range1_oneson1_45a             // One son: remove the bottom and top 5% savers and no explicit marrage status for child
eststo em10: xi: quietly qreg saving $xlistb if range1_onegirl1_45a           // One daughter: remove the bottom and top 5% savers and no explicit marrage status for child
estout em* using $result\table6.xls, keep(sr) cells(b(star fmt(%9.2f) ) se(par ))   starlevels(* .10 ** .05 *** .01)  append 
			 
				 
************ Table 7: Pooled Sample - Rural Household-Level Savings in 2002 *************        
global xlist sr sonsr son loginc loginc2 age5 age10 age15 hsize hage  hsex hedu hmin badhealth gini    // define regression variables
eststo db3: xi: reg saving $xlist  if child1,  r                             // OLS regressoin for extended family with one child
eststo db4: xi: qreg saving $xlist  if child1                                // median regression for extended family with one child
eststo db5: xi: reg saving $xlist  if onechild40,  r                         // OLS regression for nuclear family
eststo db6: xi: qreg saving $xlist  if onechild40                            // median regression for nuclear family
estout db*, cells(b(star fmt(%9.2f)) se(par))   starlevels(* .10 ** .05)    ///
     stats(N, fmt(%9.0g) ) replace
	 
	 
************* Table 8: Rural Household-Level Savings in 2002 (with the Interactive Terms of Income Quartiles and Local Sex Ratio*********
/* With interactive terms of income quintiles and local sex ratio*/
global z saving pinc1 pinc2 pinc3 pinc4 loginc*  age5 age10 age15 hage hsex hedu hmin badhealth gini
eststo r1: xi: reg $z if oneson==1 & mage<40, robust                         // One son
eststo r2: xi: reg $z  if onegirl==1 & mage<40, robust                       // One daughter
eststo r3: xi: reg $z  if oneson==1 & hage<40 & range1_oneson, robust        // One son: trim bottom and top 5% savers 
eststo r4: xi: reg $z  if onegirl==1 & hage<40 & range1_onegirl, robust      // One daughter: trim bottom and top 5% savers
estout r1 r2 r3 r4, cells(b(star fmt(%9.2f)) se(par)) drop(_cons)  starlevels(* .10 ** .05 *** .01) ///
        stats(r2_a  aic N, fmt(%9.2f %9.1f %9.0g) ) replace 

 